%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This file computes manager FE%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

path(path,'D:\Workdata\702525\MATLAB_AKM\matlab_bgl\'); %path to the matlabBGL files 

clear

diary('I:\Workdata\702525\Alex\MA\AKM\firmyear_panel_over5.log')

%LOAD DATA  
%s=['I:\Workdata\702525\Alex\MA\manager\manager_lbnryearfe.raw'];
s=['I:\Workdata\702525\Alex\MA\manager\firmyear_panel_cvrnr_over5.raw'];
data=importdata(s);

lbnr_old=data(:,1);
year_old=data(:,2);
firmid=data(:,3);
year=data(:,4);
pid=data(:,5);
firmyearid=data(:,6);
y_old=data(:,7);

lagid=[NaN;pid(1:end-1)];
sameid=lagid==pid;
lagfirmid=[NaN;firmid(1:end-1)];
lagfirmid(~sameid)=NaN;
lagfirmid(lagfirmid==-9)=NaN;


N=length(y_old);
sel=~isnan(lagfirmid);

%relabel
firmid_old=firmid;
pid_old=pid;
[firms,m,n]=unique([firmid;lagfirmid(sel)]);
firmid=n(1:N);
lagfirmid(sel)=n(N+1:end);
[pids,m,n]=unique(pid);
pid=n;

%initial descriptive stats
fprintf('\n')
disp('Some descriptive stats:')
s=['# of p-y obs: ' int2str(length(y_old))];
disp(s);
s=['# of workers: ' int2str(max(pid))];
disp(s);
s=['# of firms: ' int2str(max(firmid))];
disp(s);
fprintf('\n')


%FIND CONNECTED SET
%%%%%%%%%%%%%%%%%%%%%
disp('Finding connected set...')
A=sparse(lagfirmid(sel),firmid(sel),1); %adjacency matrix
%make it square
[m,n]=size(A);
if m>n
    A=[A,zeros(m,m-n)];
end
if m<n
    A=[A;zeros(n-m,n)];
end
A=max(A,A'); %connections are undirected

[sindex, sz]=components(A); %get connected sets
idx=find(sz==max(sz)); %find largest set
s=['# of firms: ' int2str(length(A))];
disp(s);
s=['# connected sets:' int2str(length(sz))];
disp(s);
s=['Largest connected set contains ' int2str(max(sz)) ' firms'];
disp(s);
fprintf('\n')
clear A lagfirmid
firmlst=find(sindex==idx); %firms in connected set
sel=ismember(firmid,firmlst);

y_old=y_old(sel); firmid=firmid(sel); pid=pid(sel);
year=year(sel); pid_old=pid_old(sel);  firmid_old=firmid_old(sel);
N=length(y_old); firmyearid=firmyearid(sel);
lbnr_old=lbnr_old(sel); year_old=year_old(sel);

%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%

disp('Relabeling ids again...')
%relabel the firms
[firms,m,n]=unique(firmid);
firmid=n;

%relabel the workers
[pids,m,n]=unique(pid);
pid=n;

%descriptive stats for connected set
fprintf('\n')
disp('Now restricted to the largest connected set');
s=['# of p-y obs: ' int2str(length(y_old))];
disp(s);
s=['# of workers: ' int2str(max(pid))];
disp(s);
s=['# of firms: ' int2str(max(firmid))];
disp(s);
fprintf('\n')

%Relabel firms and workers
firmyearid_old=firmyearid;
[firmyearids,m,n]=unique(firmyearid);
firmyearid=n;

%Convert to firm-year panel

NT=length(lbnr_old);
M=max(pid);
N=max(firmyearid);
% 
% D=sparse(1:NT,pid',1);
% F=sparse(1:NT,firmyearid',1);
% 
% G=F'*D;  %converts to firm-year panel of manager FE
% lbnr=F'*lbnr_old; %firm id
% year=F'*year_old;
% y=F'*y_old;
G=sparse(1:NT,pid',1);
lbnr=lbnr_old;
year=year_old;
y=y_old;


%Run regression

%create firm and year and industry fixed effects
[firmids,m,n]=unique(lbnr);
firmid=n;
J=max(firmid);
FF=sparse(1:NT,firmid',1);

S=speye(J-1);
S=[S;sparse(-zeros(1,J-1))]; 

yrmin=min(year);
R=sparse(1:NT,year-yrmin+1,1); %year effects
R(:,1)=[];

Z=[R];
%Z=[z, exper, voc, high, vapp,  R];
%Z=[R];


X=[G,FF*S,Z];

disp('Running regression...')
tic
xx=X'*X;
xy=X'*y;
L=ichol(xx,struct('type','ict','droptol',1e-2,'diagcomp',.2));
b=pcg(xx,xy,1e-10,1000,L,L');
toc
disp('Done')

%ANALYZE RESULTS
xb=X*b;
r=y-xb;

LV=inv(L);
dof=N-J-M+1-size(Z,2);
var=sum(LV'.*LV,2);
var=(r'*r)/(dof-1)*var;
var=sqrt(var);

clear xx xy L LV

disp('Goodness of fit:')
RMSE=sqrt(sum(r.^2)/dof)
RSS=sum(r.^2)
TSS=sum((y-mean(y)).^2);
R2=1-sum(r.^2)/TSS
adjR2=1-sum(r.^2)/TSS*(N-1)/dof

ahat=b(1:M);
avhat=var(1:M);
avhat=full(avhat);
ghat=b(M+1:M+J-1);
bhat=b(M+J:end);
disp('check for problems with year effects. should report zero')
sum(bhat==0)

pe=G*ahat;
pevar=G*avhat;
fe=FF*S*ghat;
xb=X(:,M+J:end)*bhat;

clear D F X b

disp('Variance-Covariance of worker and firm effs (p-y weighted)');
cov(pe,fe)
disp('Correlation coefficient');
corr(pe,fe)
disp('Means of person/firm effs')
mean([pe,fe])

disp('Full Covariance Matrix of Components')
disp('    y      pe      fe      xb      r')
C=cov([y,pe,fe,xb,r])


%export 
disp('Saving effects...')
out=[lbnr_old year pid_old pe pevar fe xb r y];
s=['I:\Workdata\702525\Alex\MA\manager\manager_fe_over5.txt'];
dlmwrite(s, out, 'delimiter', '\t', 'precision', 16);


diary off;
